cap log close
clear
clear matrix
set more off

/***********************************************************************************
***	This program examines factors affecting the magnitude of rotation group bias ***
*** in the Current Population Survey as presented in                             ***
*** Krueger, Mas, and Niu (Review of Economics and Statistics, forthcoming).     ***
************************************************************************************/

// Change to the directory that contains datasets
local datadir "C:"
// Change to the directory that contains output files
local outdir "C:"

// log file
log using `outdir'/RGB_CPS_codes_analysis_REStat.log, replace

// Set seed for random number generator
set seed 1234567890

/***********************************************************************************
	Figure 4: Multiplicative indices of RGB by unemployment duration. 
	(Fraction of labor force)
************************************************************************************/

// Read in necessary information by year, and then append annual samples
local vars year monis age unemp lf unempdur weight

use `vars' using `datadir'/base2014ax.dta if age >= 16 & lf < ., clear
drop age
save `outdir'/temp.dta, replace

foreach y of numlist 1976/2013 {
	use `vars' using `datadir'/base`y'ax.dta if age >= 16 & lf < ., clear
	drop age
	append using `outdir'/temp.dta
	save `outdir'/temp.dta, replace
}

// Generate four categories of unemployed workers by unemployment duration
replace unempdur = . if unempdur < 0	// Valid entry for duration is greater than 0
replace unempdur = . if unemp != 1
gen unemp1 = (unempdur <  4)
gen unemp2 = (unempdur >= 4  & unempdur <= 14)
gen unemp3 = (unempdur >= 15 & unempdur <= 26)
gen unemp4 = (unempdur >= 27 & unempdur < .)

// Count labor force participants and unemployed workers by duration
collapse (sum) lf unemp1-unemp4 [pw=weight], by(year monis)

erase `outdir'/temp.dta

// Calculate unemployed workers by duration as fractions of the labor force
gen unemprt1 = unemp1/lf
gen unemprt2 = unemp2/lf
gen unemprt3 = unemp3/lf
gen unemprt4 = unemp4/lf

foreach x of varlist unemprt1-unemprt4 {
	// Calculate average across 8 rotation groups in each year
	bysort year: egen double `x'_avg = mean(`x')
	// Calculate multiplicative indices
	gen `x'_ind = `x'/`x'_avg*100
}

keep year monis *_ind *_avg

// Reshape the data from (year*MIS-by-measure) to (year*MIS*measure)

rename unemprt1_ind unemprt_ind1
rename unemprt2_ind unemprt_ind2
rename unemprt3_ind unemprt_ind3
rename unemprt4_ind unemprt_ind4

rename unemprt1_avg unemprt_avg1
rename unemprt2_avg unemprt_avg2
rename unemprt3_avg unemprt_avg3
rename unemprt4_avg unemprt_avg4

reshape long unemprt_ind unemprt_avg, i(year monis) j(measure)

// Estimate the slope of multiplicative indices as a measure of RGB
gen slope = .
sum year
local minyr = r(min)
local maxyr = r(max)
sum measure
local minmea = r(min)
local maxmea = r(max)
foreach y of numlist `minyr'/`maxyr' {
	foreach i of numlist `minmea'/`maxmea' {
		quietly reg unemprt_ind monis if year == `y' & measure == `i'
		replace slope = _b[monis] if year == `y' & measure == `i'
	}
}

// Reshape the data from (year*MIS*measure) to (year*measure-by-MIS)
reshape wide unemprt_ind, i(year measure unemprt_avg slope) j(monis)

#delimit ;
label define measure
	1 "% <4wks"
	2 "% 4-14wks"
	3 "% 15-26wks"
	4 "% >=27wks";
#delimit cr
label values measure measure

foreach i of numlist 1/8 {
	label var unemprt_ind`i' "Multiplicative indices, MIS`i'"
}
label var unemprt_avg "Average over MIS"
label var slope "Slope of multiplicative indices"

compress
sort measure year
order measure year unemprt_ind1-unemprt_ind8 unemprt_avg slope

save `outdir'/RGB_CPS_indices_duration.dta, replace

// Plot RGB of unemployment rate by duration.
// Also include fitted line with time trend and level shift.
gen time = year-1993
gen post93 = (year >= 1994)
gen yhat = .

sum measure
local minmea = r(min)
local maxmea = r(max)
foreach i of numlist `minmea'/`maxmea' {
	reg slope time post93 if measure == `i'
	predict yhat`i' if measure == `i', xb
	replace yhat = yhat`i' if measure == `i'
}

#delimit ;
twoway (line slope yhat year, mc(edkblue)),
	by(measure,iscale(*1.2) rows(2) legend(off) compact note("") title("Magnitude of Bias of Unemployment Duration, 1976-2014",size(med)) subtitle("Fraction of the labor force"))
	xtitle("Year")
	ytitle("Magnitude of Bias")
	xlabel(1975(10)2015)
	ylabel(-5(1)2)
	ylabel(,labs(small))
	xlabel(,labs(small))
	saving(`outdir'/RGB_CPS_indices_duration.gph, replace);
#delimit cr
graph export `outdir'/RGB_CPS_indices_duration.pdf, replace


/***********************************************************************************
	Get data for analysis by subgroup.
	-> Unemployment questions (Table 3)
	-> Gender (Table B1)
	-> Imputation (Table B2)
	-> Proxy response (Table B4)
************************************************************************************/

// Extract necessary information and count by response

foreach y of numlist 1976/2014 {

	di "Year `y'"

	use `datadir'/base`y'ax.dta if age >= 16 & lf < ., clear
	
	gen popct = 1
	
	*********** Responses to labor force questions ***********
	
	* No search information between June and August 1995
	
	gen act_search = 0
	if (`y' >= 1994) & ~(`y' == 1995 & intmonth >= 6 & intmonth <= 8) {
		foreach var of varlist lkjob11-lkjob36 {
			replace act_search = 1 if act_search == 0 & (`var' >= 1  & `var' <= 9 )
		}
	}	
	
	if (`y' >= 1994) & ~(`y' == 1995 & intmonth >= 6 & intmonth <= 8) {
		gen layofflw_all = ((recall == 1 | recall6m == 1) & (availreclw == 1 | availreclwrsn == 1))
		gen wrklw_all = (wrklw != 1)
		gen lkjob4wk_all = (lkjob4wk == 1 & wrklw_all == 1)
		gen actsea_all = (act_search == 1 & lkjob4wk_all == 1)
		gen availjoblw_all = ((availjoblw == 1 | (availjoblwrsn == 1 | availjoblwrsn == 2)) & actsea_all == 1)
	}
	else if (`y' >= 1989 & `y' <= 1993) {
		gen layofflw_all = (!(actlw == 1 | wrklw == 1) & (absentlw == 6 | absentlw == 7) & (aval4job == 1 | whynotaval == 1 | whynotaval == 2))
		gen wrklw_all = (!(actlw == 1 | wrklw == 1))
		gen lkjob4wk_all = ((lkjob4wk == 1 | (whylkjob > 0 & whylkjob < .)) & wrklw_all == 1)
		gen actsea_all = .
		gen availjoblw_all = ((aval4job == 1 | whynotaval == 1 | whynotaval == 2) & lkjob4wk_all == 1)
	}
	else {
		gen layofflw_all = (!(actlw == 1 | wrklw == 1) & (absentlw == 6 | absentlw == 7) & (aval4joblw == 2 | whynotaval == 1 | whynotaval == 2))
		gen wrklw_all = (!(actlw == 1 | wrklw == 1))
		gen lkjob4wk_all = ((lkjob4wk == 1 | (whylkjob > 0 & whylkjob < .)) & wrklw_all == 1)
		gen actsea_all = .
		gen availjoblw_all = ((aval4joblw == 2 | whynotaval == 1 | whynotaval == 2) & lkjob4wk_all == 1)
	}
	
	*********** Unimputed labor force status ***********
	
	gen lf_unimp = .
	gen unemp_unimp = .
	if (`y' >= 1982) {
		replace lf_unimp = lf if pxmlr == 0
		replace unemp_unimp = unemp if pxmlr == 0
	}
	
	// Missing imputation flag between June 1995 and August 1995
	replace lf_unimp    = . if year == 1995 & intmonth >= 6 & intmonth <= 8
	replace unemp_unimp = . if year == 1995 & intmonth >= 6 & intmonth <= 8	
	
	*********** Output necessary variables ***********
	
	#delimit ;
	keep year monis weight popct unemp lf
		layofflw_all wrklw_all lkjob4wk_all actsea_all availjoblw_all
		female
		lf_unimp unemp_unimp
		lfproxy1;
	#delimit cr
	
	rename lfproxy1 lfproxy
	
	compress
	save `outdir'/temp`y'.dta, replace
}

// Append annual data
use `outdir'/temp2014.dta, clear
foreach y of numlist 1976/2013 {
	append using `outdir'/temp`y'.dta
}

// Save the combined data
compress
save `outdir'/data_by_group.dta, replace

// Erase annual data
foreach y of numlist 1976/2014 {
	erase `outdir'/temp`y'.dta
}

/***********************************************************************************
	Table 3: multiplicative indices of RGB by responses to unemployment questions.
************************************************************************************/

use `outdir'/data_by_group.dta, clear

// Calculate summary data by year
collapse (sum) popct layofflw_all wrklw_all lkjob4wk_all actsea_all availjoblw_all [pw=weight], by(year monis)

// Present count as fraction of the population
gen layofflw_pop    = layofflw_all  /popct*100
gen wrklw_pop       = wrklw_all     /popct*100
gen lkjob4wk_pop    = lkjob4wk_all  /popct*100
gen actsea_pop      = actsea_all    /popct*100
gen availjoblw_pop  = availjoblw_all/popct*100

foreach x of varlist layofflw_pop wrklw_pop lkjob4wk_pop actsea_pop availjoblw_pop {
	// Calculate average across 8 rotation groups by year
	bysort year: egen double `x'_avg = mean(`x')
	// Calculate multiplicative and additive indices
	gen `x'_ind = `x'/`x'_avg*100
}

// Reshape the data from (year*gender*MIS-by-measure) to (year*gender*MIS*measure)

keep year monis *_ind *_avg

rename layofflw_pop_ind   ind1
rename wrklw_pop_ind      ind2
rename lkjob4wk_pop_ind   ind3
rename actsea_pop_ind     ind4
rename availjoblw_pop_ind ind5

rename layofflw_pop_avg   avg1
rename wrklw_pop_avg      avg2
rename lkjob4wk_pop_avg   avg3
rename actsea_pop_avg     avg4
rename availjoblw_pop_avg avg5

// Reshape data from "year*MIS-by-measure" to "year*MIS*measure"
reshape long ind avg, i(year monis) j(measure)

// Calculate the magnitude of RGB
gen slope = .
sum year
local minyr = r(min)
local maxyr = r(max)
sum measure
local minmea = r(min)
local maxmea = r(max)
foreach y of numlist `minyr'/`maxyr' {
	foreach i of numlist `minmea'/`maxmea' {
		quietly sum ind if year == `y' & measure == `i'
		if (r(N) > 0) {
			quietly reg ind monis if year == `y' & measure == `i'
			replace slope = _b[monis] if year == `y' & measure == `i'
		}
	}
}

// Reshape the data from "year*MIS*measure" to "year*MIS-by-measure"
reshape wide ind, i(year measure avg slope) j(monis)

// Average over pre- and post-1994 period
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse ind1-ind8 avg slope, by(measure period)

// Organize data output
sort measure period
order measure period ind1-ind8 avg slope

#delimit ;
label define measure
	1 "Layoff-Pop"
	2 "Not working last week-Pop"
	3 "Looking for job for 4wks-Pop"
	4 "Active search method-Pop"
	5 "Available for job last week-Pop";
#delimit cr
label values measure measure

label define period 1 "1976-1993" 2 "1994-2014"
label values period period

foreach i of numlist 1/8 {
	label var ind`i' "Multiplicative indices, MIS`i'"
}
label var avg "Average over MIS"
label var slope "Magnitude of RGB"

compress
save `outdir'/RGB_CPS_indices_jobsearch.dta, replace


/***********************************************************************************
	Table A1: multiplicative indices of RGB by gender.
************************************************************************************/

use `outdir'/data_by_group.dta, clear

collapse (sum) lf unemp [pw=weight], by(year female monis)

// Calculate unemployment rate
gen unemprt = unemp/lf

// Calculate average across 8 rotation groups by year and gender
bysort year female: egen double unemprt_avg = mean(unemprt)
// Calculate multiplicative and additive indices
gen unemprt_ind = unemprt/unemprt_avg*100

keep year female monis unemprt_ind unemprt_avg

// Calculate slope of multiplicative indices
gen slope = .
quietly sum year
local minyr = r(min)
local maxyr = r(max)
foreach y of numlist `minyr'/`maxyr' {
	foreach j of numlist 0/1 {
		quietly count if year == `y' & female == `j'
		if (r(N) > 0) {
			quietly reg unemprt_ind monis if year == `y' & female == `j'
			replace slope = _b[monis] if year == `y' & female == `j'
		}
	}
}

// Reshape the data from (year*gender*MIS) to (year*gender-by-MIS)
reshape wide unemprt_ind, i(year female unemprt_avg slope) j(monis)

// Average over pre- and post-1994 period
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse unemprt_ind1-unemprt_ind8 unemprt_avg slope, by(period female)

// Organize output dataset
sort period female
order period female unemprt_ind1-unemprt_ind8 unemprt_avg slope

label define female 0 "Men" 1 "Women"
label values female female

label define period 1 "1976-1993" 2 "1994-2014"
label value period period

foreach i of numlist 1/8 {
	label var unemprt_ind`i' "Multiplicative indices, MIS`i'"
}
label var unemprt_avg "Average over MIS"
label var slope "Slope of multiplicative indices"

compress
save `outdir'/RGB_CPS_indices_gender.dta, replace


/***********************************************************************************
	Table B2: multiplicative indices of RGB by imputation status.
************************************************************************************/

use `outdir'/data_by_group.dta, clear

* Count by labor force status
collapse (sum) unemp lf unemp_unimp lf_unimp [pw=weight], by(year monis)

// Unemployment rate with and without imputed values
gen unemprt = unemp/lf
gen unemprt_unimp = unemp_unimp/lf_unimp

foreach x of varlist unemprt unemprt_unimp {
	// Calculate average across 8 rotation groups by year
	bysort year: egen double `x'_avg = mean(`x')
	// Calculate multiplicative and additive indices
	gen `x'_ind = `x'/`x'_avg*100
}

// Reshape the data from (year*MIS-by-measure) to (year*MIS*measure)
keep year monis *_ind *_avg

rename unemprt_ind       ind1
rename unemprt_unimp_ind ind2

rename unemprt_avg       avg1
rename unemprt_unimp_avg avg2

reshape long ind avg, i(year monis) j(measure)

gen slope = .
quietly sum year
local minyr = 1982
local maxyr = r(max)
foreach y of numlist `minyr'/`maxyr' {
	foreach i of numlist 1/2 {
		quietly count if year == `y' & measure == `i'
		if (r(N) > 0) {
			quietly reg ind monis if year == `y' & measure == `i'
			replace slope = _b[monis] if year == `y' & measure == `i'
		}
	}
}

// Reshape the data from (year*MIS*measure) to (year*measure-by-MIS)
reshape wide ind, i(year measure avg slope) j(monis)

// Average over pre and post-1994 period
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse avg slope ind1-ind8, by(measure period)

// Organize output data
sort measure period
order measure period ind1-ind8 avg slope

#delimit ;
label define measure
	1 "Unemp rate - all"
	2 "Unemp rate - exld imputed values";
#delimit cr
label values measure measure

label define period 1 "1982-1993" 2 "1994-2014"
label values period period

foreach i of numlist 1/8 {
	label var ind`i' "Multiplicative indices, MIS`i'"
}
label var avg "Average over MIS"
label var slope "Magnitude of RGB"

compress
save `outdir'/RGB_CPS_indices_imputation.dta, replace


/***********************************************************************************
	Table B4: multiplicative indices of RGB by self/proxy responses.
************************************************************************************/

use `outdir'/data_by_group.dta if lfproxy == 1 | lfproxy == 2, clear

// Count the population and by labor force status
collapse (sum) lf unemp popct [pw=weight], by(year monis lfproxy)

gen unemprt = unemp/lf

foreach x of varlist popct unemprt {
	// Calculate average across 8 rotation groups by year
	bysort year lfproxy: egen double `x'_avg = mean(`x')
	// Calculate multiplicative and additive indices
	gen `x'_ind = `x'/`x'_avg*100
}

// Reshape the data from (year*MIS-by-measure) to (year*MIS*measure)
keep year lfproxy monis *_ind *_avg

rename popct_ind   ind1
rename unemprt_ind ind2

rename popct_avg   avg1
rename unemprt_avg avg2

reshape long ind avg, i(year lfproxy monis) j(measure)

// Calculate the magnitude of bias
gen slope = .
sum year
local minyr = r(min)
local maxyr = r(max)
foreach y of numlist `minyr'/`maxyr' {
	foreach i of numlist 1/2 {
		foreach j of numlist 1/2 {
			quietly reg ind monis if year  == `y' & measure == `i' & lfproxy == `j'
			replace slope = _b[monis] if year == `y' & measure == `i' & lfproxy == `j'
		}
	}
}

// Reshape the data from (year*MIS*measure) to (year*measure-by-MIS)
reshape wide ind, i(year lfproxy measure avg slope) j(monis)

// Average over pre and post-1994 period
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse avg slope ind1-ind8, by(measure period lfproxy)

// Organize the data output
sort measure period lfproxy
order measure period lfproxy ind1-ind8 avg slope

#delimit ;
label define measure
	1  "Populaiton count"
	2  "Unemployment rate";
#delimit cr
label values measure measure

label define lfproxy 1 "self" 2 "proxy"
label values lfproxy lfproxy

label define period 1 "1976-1993" 2 "1994-2014"
label values period period

* Present counts in thousands
replace avg = avg/1000 if measure == 1

foreach i of numlist 1/8 {
	label var ind`i' "Multiplicative indices, MIS`i'"
}
label var avg "Average over MIS"
label var slope "Magnitude of RGB"

compress
save `outdir'/RGB_CPS_indices_proxy.dta, replace

* Suspend log file for the boostrapping exercise
log close

/***********************************************************************************
	Bootstrapped SE for analysis by subgroup.
	-> Unemployment questions (Table 3)
	-> Gender (Table B1)
	-> Imputation (Table B2)
	-> Proxy response (Table B4)
************************************************************************************/

// Define parameters
local iteration = 500
local minyr = 1976
local maxyr = 2014

// Create an empty dataset to store RGB of unemployment rate from bootstrapped sample
* By response to employment questions
clear
set obs 1 
gen year = .
gen iteration = .
gen measure = .
gen slope = .
save `outdir'/RGB_CPS_indices_jobsearch_bootstrap.dta, replace
local obscnt1 = 1
* By gender
clear
set obs 1 
gen year = .
gen iteration = .
gen female = .
gen slope = .
save `outdir'/RGB_CPS_indices_gender_bootstrap.dta, replace
local obscnt2 = 1
* By imputation flag of LF status
clear
set obs 1 
gen year = .
gen iteration = .
gen measure = .
gen slope = .
save `outdir'/RGB_CPS_indices_imputation_bootstrap.dta, replace
local obscnt3 = 1
* By proxy response
clear
set obs 1
gen year = .
gen iteration = .
gen measure = .
gen lfproxy = .
gen slope = .
save `outdir'/RGB_CPS_indices_proxy_bootstrap.dta, replace
local obscnt4 = 1

// Draw samples from annual files
foreach i of numlist 1/`iteration' {
	foreach y of numlist `minyr'/`maxyr' {
	
		use `outdir'/data_by_group.dta if year == `y', clear
		
		gen wt = .
		bsample, weight(wt)
		replace wt = wt*weight
		drop weight
		
		save `outdir'/temp.dta, replace
		
		*~~~~~~~~~~ By response to unemployment questions ~~~~~~~~~~
		
		use `outdir'/temp.dta, clear
		
		collapse (sum) popct layofflw_all wrklw_all lkjob4wk_all actsea_all availjoblw_all [pw=wt], by(monis)
		gen measure1 = layofflw_all  /popct*100
		gen measure2 = wrklw_all     /popct*100
		gen measure3 = lkjob4wk_all  /popct*100
		gen measure4 = actsea_all    /popct*100
		gen measure5 = availjoblw_all/popct*100
		
		save `outdir'/temp1.dta, replace
		
		foreach j of numlist 1/5 {
		
			use `outdir'/temp1.dta, clear
			
			// RGB multiplicative indices
			quietly sum measure`j'
			gen ind`j' = measure`j'/r(mean)*100
			quietly count if ind`j' < .
			if (r(N) > 0) {
			
				quietly reg ind`j' monis
			
				// Store bootstrapped values
				use `outdir'/RGB_CPS_indices_jobsearch_bootstrap.dta, clear
				replace year = `y' in `obscnt1'
				replace iteration = `i' in `obscnt1'
				replace measure = `j' in `obscnt1'
				replace slope = _b[monis] in `obscnt1'
			
				// Increment the number observations
				local obscnt1 = `obscnt1' + 1	
				set obs `obscnt1'	// Create a new blank row
			
				save `outdir'/RGB_CPS_indices_jobsearch_bootstrap.dta, replace
				
			}
			
		}
		
		erase `outdir'/temp1.dta
		
		*~~~~~~~~~~ By gender ~~~~~~~~~~
		
		use `outdir'/temp.dta, clear
		
		collapse (sum) unemp lf [pw=wt], by(monis female)
		gen unemprt = unemp/lf
		
		save `outdir'/temp2.dta, replace
			
		foreach j of numlist 0/1 {

			use `outdir'/temp2.dta if female == `j', clear

			// RGB multiplicative indices
			quietly sum unemprt
			gen indices = unemprt/r(mean)*100
			quietly reg indices monis
			
			// Record the bootstraped values
			use `outdir'/RGB_CPS_indices_gender_bootstrap.dta, clear
			replace year = `y' in `obscnt2'
			replace iteration = `i' in `obscnt2'
			replace female = `j' in `obscnt2'
			replace slope = _b[monis] in `obscnt2'
			
			// Increment the number observations in the dataset
			local obscnt2 = `obscnt2' + 1
			set obs `obscnt2'	// Create a new blank row
			
			compress
			save `outdir'/RGB_CPS_indices_gender_bootstrap.dta, replace
			
		}
		
		erase `outdir'/temp2.dta
		
		*~~~~~~~~~~ By imputation ~~~~~~~~~~
		
		// Only available after 1982
		if (`y' >= 1982) {
		
			use `outdir'/temp.dta, clear
		
			// Unemployment rate with or without imputed values
			rename unemp unemp1
			rename lf    lf1
			rename unemp_unimp unemp2
			rename lf_unimp    lf2
	
			collapse (sum) unemp1 lf1 unemp2 lf2 [pw=wt], by(monis)
			gen unemprt1 = unemp1/lf1
			gen unemprt2 = unemp2/lf2
			keep monis unemprt1 unemprt2
		
			save `outdir'/temp3.dta, replace
			
			foreach j of numlist 1/2 {
		
				use `outdir'/temp3.dta, replace
		
				// RGB multiplicative indices
				quietly sum unemprt`j'
				gen indices = unemprt`j'/r(mean)*100
				quietly reg indices monis
			
				// Store bootstrapped values
				use `outdir'/RGB_CPS_indices_imputation_bootstrap.dta, clear
				replace year = `y' in `obscnt3'
				replace iteration = `i' in `obscnt3'
				replace measure = `j' in `obscnt3'
				replace slope = _b[monis] in `obscnt3'
			
				// Increment the number observations
				local obscnt3 = `obscnt3' + 1
				set obs `obscnt3'	// Create a new blank row
			
				save `outdir'/RGB_CPS_indices_imputation_bootstrap.dta, replace
			
			}
		
			erase `outdir'/temp3.dta
			
		}
		
		*~~~~~~~~~~ By proxy response ~~~~~~~~~~
		
		use `outdir'/temp.dta, clear
		
		collapse (sum) popct unemp lf [pw=wt] if lfproxy == 1 | lfproxy == 2, by(monis lfproxy)
		gen unemprt = unemp/lf		
		rename popct   measure1
		rename unemprt measure2
		
		save `outdir'/temp4.dta, replace
			
		foreach j of numlist 1/2 {
		
			foreach k of numlist 1/2 {	// Measures
			
				use `outdir'/temp4.dta if lfproxy == `j', clear

				// RGB multiplicative indices
				quietly sum measure`k'
				gen indices = measure`k'/r(mean)*100
				quietly reg indices monis
			
				// Record the bootstraped values
				use `outdir'/RGB_CPS_indices_proxy_bootstrap.dta, clear
				replace year = `y' in `obscnt4'
				replace iteration = `i' in `obscnt4'
				replace measure = `k' in `obscnt4'
				replace lfproxy = `j' in `obscnt4'
				replace slope = _b[monis] in `obscnt4'
			
				// Increment the number observations in the dataset
				local obscnt4 = `obscnt4' + 1	
				set obs `obscnt4'
				
				compress
				save `outdir'/RGB_CPS_indices_proxy_bootstrap.dta, replace
				
			}			
		}
		
		erase `outdir'/temp4.dta
		
		// Erase data for this year
		erase `outdir'/temp.dta		
		
	}
}

*Re-start the log file
log using `outdir'/RGB_CPS_codes_analysis_REStat.log, append

// Output indices with bootstrapped errors

*~~~~~~~~~~ By response to employment questions ~~~~~~~~~~
use `outdir'/RGB_CPS_indices_jobsearch_bootstrap.dta if year < ., clear
outsheet using `outdir'/RGB_CPS_indices_jobsearch_bootstrap.csv, comma replace

// Calculate average over pre and post 1994 periods
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse slope, by (period measure iteration)

// Calculate boostrapped SE (or SD of bootstrapped values)
collapse slope (sd) slope_se = slope, by (measure period)
rename slope slope_mean
keep measure period slope_mean slope_se

merge 1:1 measure period using `outdir'/RGB_CPS_indices_jobsearch.dta, nogen
order slope_mean slope_se, last
sort measure period
save `outdir'/RGB_CPS_indices_jobsearch.dta, replace
outsheet using `outdir'/RGB_CPS_indices_jobsearch.csv, comma replace

*~~~~~~~~~~ By gender ~~~~~~~~~~
use `outdir'/RGB_CPS_indices_gender_bootstrap.dta if year < ., clear
outsheet using `outdir'/RGB_CPS_indices_gender_bootstrap.csv, comma replace

// Average over pre and post-1994 period
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse slope, by(period female iteration)

// Calcualte bootstrapped SE (or SD of bootstrapped values)
collapse slope (sd) slope_se = slope, by(period female)
rename slope slope_mean
keep period female slope_mean slope_se

// Merge SE with indices
merge 1:1 period female using RGB_CPS_indices_gender.dta, nogen
order slope_mean slope_se, last
save `outdir'/RGB_CPS_indices_gender.dta, replace
outsheet using `outdir'/RGB_CPS_indices_gender.csv, comma replace

*~~~~~~~~~~ By imputation ~~~~~~~~~~
use `outdir'/RGB_CPS_indices_imputation_bootstrap.dta if year < ., replace
outsheet using `outdir'/RGB_CPS_indices_imputation_bootstrap.csv, comma replace

// Average over pre and post-1994 period
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse slope, by(measure period iteration)

// Calculate bootstrapped SE (or SD of bootstrapped values)
collapse slope (sd) slope_se = slope, by(measure period)
rename slope slope_mean
keep measure period slope_mean slope_se

// Merge with the indices
merge 1:1 measure period using `outdir'/RGB_CPS_indices_imputation.dta, nogen
order slope_mean slope_se, last
save `outdir'/RGB_CPS_indices_imputation.dta, replace
outsheet using `outdir'/RGB_CPS_indices_imputation.csv, comma replace

*~~~~~~~~~~ By proxy response ~~~~~~~~~~

use `outdir'/RGB_CPS_indices_proxy_bootstrap.dta if year < ., clear
outsheet using `outdir'/RGB_CPS_indices_proxy_bootstrap.csv, comma replace

// Average over pre and post-1994 period
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse slope, by(measure period lfproxy iteration)

// Calculate bootstrapped SE (or SE of bootstrapped values)
collapse slope (sd) slope_se = slope, by(measure period lfproxy)
rename slope slope_mean
keep measure period lfproxy slope_mean slope_se

// Merge with indices
merge 1:1 measure period lfproxy using `outdir'/RGB_CPS_indices_proxy.dta, nogen
order slope_mean slope_se, last
save `outdir'/RGB_CPS_indices_proxy.dta, replace
outsheet using `outdir'/RGB_CPS_indices_proxy.csv, comma replace

// Erase individual-level data
erase `outdir'/data_by_group.dta

/***********************************************************************************
	Table 1, Figure 3, and Figure B3: Time series analysis of RGB and type-A non-interview.
************************************************************************************/

foreach y of numlist 1976/2014 {

	di "Year `y'"
	use `datadir'/base`y'ax.dta, clear
	
	/* Non-interview is a household status. 
	   Obtain a sample of households instead of individuals. */
	
	* 1998-2014, use household ID (only valid within a month)
	if `y' >= 1998 & `y' <= 2014 {
	
		// Generate a unique household ID in the annual data
		sort year intmonth monis hhidm
		by year intmonth monis hhidm: gen hhid = 1 if _n == 1
		replace hhid = sum(hhid)
		save `outdir'/temp`y'.dta, replace
		
		// Household-level data for non-response
		gen refusal = (noninta94 == 3)
		gen noncontact = (noninta94 == 1 | noninta94 == 2)
		collapse hhtypeA hhtypeB hhtypeC refusal noncontact, by(year intmonth monis hhid)
		save `outdir'/temp`y'_hh.dta, replace
			
	}
	* 1994-1997 also need part II of household ID which includes serial suffix and sample identifer
	if `y' >= 1996 & `y' <= 1997 {
	
		// Generate a unique household ID in the annual data
		sort year intmonth monis hhid1 hhid2 hhid04 hhnum
		by year intmonth monis hhid1 hhid2 hhid04 hhnum: gen hhid = 1 if _n == 1
		replace hhid = sum(hhid)
		save `outdir'/temp`y'.dta, replace
	
		// Household-level data for non-response
		gen refusal = (noninta94 == 3)
		gen noncontact = (noninta94 == 1 | noninta94 == 2)
		collapse hhtypeA hhtypeB hhtypeC refusal noncontact, by(year intmonth monis hhid)
		save `outdir'/temp`y'_hh.dta, replace
		
	}
	* 1994 and 1995 need "state" to uniquely identify households
	if `y' >= 1994 & `y' <= 1995 {
		drop if intmonth >= 6 & intmonth <= 8
		
		// Generate a unique household ID in the annual data
		sort year intmonth monis state hhid1 hhid2 hhid04 hhnum
		by year intmonth monis state hhid1 hhid2 hhid04 hhnum: gen hhid = 1 if _n == 1
		replace hhid = sum(hhid)
		save `outdir'/temp`y'.dta, replace
		
		// Household-level data for non-response
		gen refusal = (noninta94 == 3)
		gen noncontact = (noninta94 == 1 | noninta94 == 2)
		collapse hhtypeA hhtypeB hhtypeC refusal noncontact, by(year intmonth monis hhid)
		save `outdir'/temp`y'_hh.dta, replace
		
	}
	* Rest use household ID and household number
	if `y' >= 1989 & `y' <= 1993 {
	
		// Generate a unique household ID in the annual data
		sort year intmonth monis hhid1 hhid2 hhnum
		by year intmonth monis hhid1 hhid2 hhnum: gen hhid = 1 if _n == 1
		replace hhid = sum(hhid)
		save `outdir'/temp`y'.dta, replace
		
		// Household-level data for non-response
		gen refusal = (noninta == 3)
		gen noncontact = (noninta == 1 | noninta == 2)
		collapse hhtypeA hhtypeB hhtypeC refusal noncontact, by(year intmonth monis hhid)
		save `outdir'/temp`y'_hh.dta, replace
	}
	* Change format before 1989
	if `y' >= 1976 & `y' <= 1988 {

		// drop children and military sample
		drop if rectype == 4 | rectype == 5
		
		// Generate a unique household ID in the annual data
		sort year intmonth monis hhid1 hhid2 hhnum
		by year intmonth monis hhid1 hhid2 hhnum: gen hhid = 1 if _n == 1
		replace hhid = sum(hhid)
		save `outdir'/temp`y'.dta, replace
		
		// Household-level data for non-response
		gen hhtypeA = (noninta < .)
		gen hhtypeB = (nonintb < .)
		gen hhtypeC = (nonintc < .)
		gen refusal = (noninta == 3)
		gen noncontact = (noninta == 1 | noninta == 2)
		collapse hhtypeA hhtypeB hhtypeC refusal noncontact, by(year intmonth monis hhid)
		save `outdir'/temp`y'_hh.dta, replace
		
	}
	
	// Keep necessary data in the individual data
	use `outdir'/temp`y'.dta, clear
	keep year intmonth monis hhid unemp lf weight
	save `outdir'/temp`y'.dta, replace
	
}

// Combine annual individual data
use `outdir'/temp2014.dta, clear
foreach y of numlist 1976/2013 {
	append using `outdir'/temp`y'.dta
}
* Save the data
compress
save `outdir'/data_by_individual.dta, replace

// Combine annual household data
use `outdir'/temp2014_hh.dta, clear
foreach y of numlist 1976/2013 {
	append using `outdir'/temp`y'_hh.dta
}
* Keep observations with unique household ID (the indicator for non-interivew should be either 0 or 1)
drop if hhtypeA > 0 & hhtypeA < 1
drop if hhtypeB > 0 & hhtypeB < 1
drop if hhtypeC > 0 & hhtypeC < 1
* Save household-level data on non-interview
compress
save `outdir'/data_by_household.dta, replace

// Erase annual data
foreach y of numlist 1976/2014 {
	erase `outdir'/temp`y'.dta
	erase `outdir'/temp`y'_hh.dta
}

// Calculate Non-interview rates by year
use `outdir'/data_by_household.dta, clear
collapse hhtypeA hhtypeB hhtypeC refusal noncontact, by(year monis)
gen nonintart = hhtypeA/(1-hhtypeB-hhtypeC)*100
gen refusalrt = refusal/(1-hhtypeB-hhtypeC)*100
gen noncontactrt = noncontact/(1-hhtypeB-hhtypeC)*100

rename nonintart rate1
rename refusalrt rate2
rename noncontactrt rate3

replace rate1 = . if rate1 == 0
replace rate2 = . if rate2 == 0
replace rate3 = . if rate3 == 0

keep year monis rate1 rate2 rate3

save `outdir'/temp.dta, replace

// Calculate annual non-interview rate for type-A non-interview
collapse rate1 rate2 rate3, by(year)

// Fill in missing values
* 1976, 1977, 1980, 1981 - fill in base on Fig16-2 of CPS TP#66
replace rate1 = 4.4 if year == 1976
replace rate2 = 2.7 if year == 1976
replace rate3 = .   if year == 1976
replace rate1 = 4.1 if year == 1977
replace rate2 = 2.6 if year == 1977
replace rate3 = .   if year == 1977
* 1978 not quite right
replace rate1 = 4.6 if year == 1978
replace rate2 = 2.6 if year == 1978
replace rate3 = .   if year == 1978
replace rate1 = 4.0 if year == 1980
replace rate2 = 2.5 if year == 1980
replace rate3 = .   if year == 1980
replace rate1 = 4.0 if year == 1981
replace rate2 = 2.5 if year == 1981
replace rate3 = .   if year == 1981
* rate of type A other than refusal
gen rate4 = rate1-rate2-rate3

label var rate1 "typeA/(typeA+interview)"
label var rate2 "refusal/(typeA+interview)"
label var rate3 "noncontact/(typeA+interview)"
label var rate4 "Type A other than refusal and noncontact"

compress
save `outdir'/RGB_CPS_nonresp.dta, replace

// Figure 3: Time trend of type-A non-interview rate
#delimit ;
line rate1 rate2 rate3 year,
	xtitle("Year")
	xlabel(1975(5)2015)
	ytitle("Rate of nonresponse")
	lp (solid dash dash_dot)
	legend(order(1 "Type A noninterview" 2 "Refusal" 3 "Noncontact") rows(1))
	saving(`outdir'/RGB_CPS_noninta_byyear.gph, replace);
#delimit cr
graph export `outdir'/RGB_CPS_noninta_byyear.pdf, replace

************* Relationship between bias and non-interivew *************
// Get bias measure of unemployment rate(slope)
use year slope_ind using RGB_CPS_annual_indices, clear
merge 1:1 year using `outdir'/RGB_CPS_nonresp.dta, nogen
save `outdir'/RGB_CPS_bias_nonresp.dta, replace

// Table 1: Regression analysis to see the time trend in non-interview
// 	controlling for type-A non-interview rate.
gen time = year-1993
gen post93 = (year >= 1994)
gen post93_time = post93 * time

* OLS
reg slope time post93
outreg2 using RGB_CPS_bias_typeA, bdec(3) excel replace
reg slope time post93 post93_time
outreg2 using RGB_CPS_bias_typeA, bdec(3) excel
reg slope time post93 rate1
outreg2 using RGB_CPS_bias_typeA, bdec(3) excel
reg slope time post93 post93_time rate1
outreg2 using RGB_CPS_bias_typeA, bdec(3) excel

// Figure B3: magnitude of bias and type-A non-interview rate
* Type-A non-interview
#delimit ;
twoway (scatter slope_ind rate1 if year<1994, m(X) mlabel(year))
	   (scatter slope_ind rate1 if year>=1994, mlabel(year)),
	xtitle("Rate of non-interview")
	ytitle("Magnitude of bias")
	title("(a) Type-A")
	legend(order(1 "1976-1993" 2 "1994-2014"))
	saving(`outdir'/bias_typeA, replace);
#delimit cr
* Refusal rate
#delimit ;
twoway (scatter slope_ind rate2 if year<1994, m(X) mlabel(year))
	   (scatter slope_ind rate2 if year>=1994, mlabel(year)),
	xtitle("Rate of non-interview")
	ytitle("Magnitude of bias")
	title("(b) Refusal")
	legend(order(1 "1976-1993" 2 "1994-2014"))
	saving(`outdir'/bias_refusal, replace);
#delimit cr
* Non-contact rate
#delimit ;
twoway (scatter slope_ind rate3 if year<1994, m(X) mlabel(year))
	   (scatter slope_ind rate3 if year>=1994, mlabel(year)),
	xtitle("Rate of non-interview")
	ytitle("Magnitude of bias")
	legend(order(1 "1976-1993" 2 "1994-2014"))
	title("(c) Noncontact")
	xlabel(1[0.5]3.5)
	saving(`outdir'/bias_noncontact, replace);
#delimit cr
* Rate of other type-A non-interview
#delimit ;
twoway (scatter slope_ind rate4 if year<1994, m(X) mlabel(year))
	   (scatter slope_ind rate4 if year>=1994, mlabel(year)),
	xtitle("Rate of non-interview")
	ytitle("Magnitude of bias")
	legend(order(1 "1976-1993" 2 "1994-2014"))
	title("(d) Other")
	xlabel(0[0.2]1.2)
	saving(`outdir'/bias_Aother, replace);
#delimit cr

// Combine four figures
#delimit ;
graph combine `outdir'/bias_typeA.gph
              `outdir'/bias_refusal.gph
	          `outdir'/bias_noncontact.gph
	          `outdir'/bias_Aother.gph, 
			  cols(2) iscale(0.7) imargin(0 0 0 0)
			  saving(`outdir'/RGB_CPS_bias_typeA.gph, replace);
#delimit cr
graph export `outdir'/RGB_CPS_bias_typeA.pdf, replace

* Erase four figures
erase `outdir'/bias_typeA.gph
erase `outdir'/bias_refusal.gph
erase `outdir'/bias_noncontact.gph
erase `outdir'/bias_Aother.gph

erase `outdir'/data_by_household.dta
erase `outdir'/data_by_individual.dta


/***********************************************************************************
	Table B3: Time series analysis of RGB and economic conditions.
************************************************************************************/

* Real per-capita GDP
clear
input year rGDPpp_BEA
1975 24934
1976 26023
1977 26950
1978 28149
1979 28725
1980 28326
1981 28772
1982 27954
1983 28983
1984 30816
1985 31838
1986 32659
1987 33488
1988 34580
1989 35517
1990 35795
1991 35295
1992 36067
1993 36579
1994 37597
1995 38166
1996 39155
1997 40426
1998 41736
1999 43195
2000 44474
2001 44464
2002 44829
2003 45662
2004 46966
2005 48089
2006 48905
2007 49300
2008 48699
2009 46930
2010 47719
2011 48116
2012 48869
2013 49584
2014 50397
end

sort year
gen rGDPpp_BEA_growth = (rGDPpp_BEA/rGDPpp_BEA[_n-1]-1)*100
keep year rGDPpp_BEA_growth
keep if year >= 1976 & year <= 2014
label var rGDPpp_BEA_growth "Growth in real per-capita GDP"
save rGDPpp_BEA_growth.dta, replace

// Growth of unemployment rate
* Read in annual unemployment rate from BLS
clear
input year unemprt_BLS
1975 8.5
1976 7.7
1977 7.1
1978 6.1
1979 5.8
1980 7.1
1981 7.6
1982 9.7
1983 9.6
1984 7.5
1985 7.2
1986 7.0
1987 6.2
1988 5.5
1989 5.3
1990 5.6
1991 6.8
1992 7.5
1993 6.9
1994 6.1
1995 5.6
1996 5.4
1997 5.0
1998 4.5
1999 4.2
2000 4.0
2001 4.7
2002 5.8
2003 6.0
2004 5.5
2005 5.1
2006 4.6
2007 4.6
2008 5.8
2009 9.3
2010 9.6
2011 9.0
2012 8.1
2013 7.4
2014 6.2
end

sort year
gen unemprt_BLS_growth = (unemprt_BLS/unemprt_BLS[_n-1]-1)*100
keep year unemprt_BLS_growth
keep if year >= 1976 & year <= 2014
label var unemprt_BLS_growth "Growth of unemployment rate"
save unemprt_BLS_growth.dta, replace

* Merge economic conditions with the magnitude of rotation group bias
use year slope_ind using `outdir'/RGB_CPS_annual_indices.dta, clear
merge 1:1 year using `outdir'/rGDPpp_BEA_growth.dta, nogen
merge 1:1 year using `outdir'/unemprt_BLS_growth.dta, nogen

gen time = year-1993
gen post93 = (year > 1993)
gen post93_time = post93 * time

** OLS
* Baseline regression: no economic conditions
quietly reg slope time post93
outreg2 using RGB_CPS_bias_econ, bdec(3) excel replace
quietly reg slope time post93 post93_time
outreg2 using RGB_CPS_bias_econ, bdec(3) excel
* Include GDP growth
quietly reg slope time post93 rGDPpp_BEA_growth
outreg2 using RGB_CPS_bias_econ, bdec(3) excel
quietly reg slope time post93 post93_time rGDPpp_BEA_growth
outreg2 using RGB_CPS_bias_econ, bdec(3) excel
* Include unemployment rate growth
quietly reg slope time post93 unemprt_BLS_growth
outreg2 using RGB_CPS_bias_econ, bdec(3) excel
quietly reg slope time post93 post93_time unemprt_BLS_growth
outreg2 using RGB_CPS_bias_econ, bdec(3) excel
* Include both growth in GDP and unemployment rate
quietly reg slope time post93 rGDPpp_BEA_growth unemprt_BLS_growth
outreg2 using RGB_CPS_bias_econ, bdec(3) excel
quietly reg slope time post93 post93_time rGDPpp_BEA_growth unemprt_BLS_growth
outreg2 using RGB_CPS_bias_econ, bdec(3) excel

/***********************************************************************************
	Figure B5: Time series analysis of RGB and unemployment duration.
************************************************************************************/
clear

// Median unemployemnt duration from BLS
input year medunempdur_BLS
1976 8.1
1977 7.1
1978 6.0
1979 5.6
1980 6.7
1981 7.0
1982 8.8
1983 10.2
1984 7.9
1985 6.9
1986 7.0
1987 6.5
1988 6.0
1989 5.2
1990 5.4
1991 6.9
1992 8.7
1993 8.3
1994 9.1
1995 8.2
1996 8.2
1997 7.9
1998 6.7
1999 6.3
2000 5.9
2001 6.7
2002 9.2
2003 10.2
2004 9.8
2005 8.9
2006 8.2
2007 8.5
2008 9.4
2009 15.7
2010 21.5
2011 21.4
2012 19.1
2013 16.8
2014 14.2
end
save `outdir'/medunempdur_BLS.dta, replace

// Merge medican unemployment duration with magnitude of rotation group bias
use year slope_ind using RGB_CPS_annual_indices.dta, clear
merge 1:1 year using `outdir'/medunempdur_BLS.dta, nogen
erase `outdir'/medunempdur_BLS.dta

// Scatter plot of the magnitude of RGB and unemployment duration
#delimit ;
twoway (scatter slope_ind medunempdur_BLS if year >= 1994, mlabel(year)),
	xtitle("Median Unemployment Duration")
	ytitle("Magnitude of bias")
	title("Magnitude of RGB and Unemp Duration, 1994-2014")
	saving(`outdir'/RGB_CPS_bias_unempdur.gph, replace);
#delimit cr
graph export `outdir'/RGB_CPS_bias_unempdur.pdf, replace


log close
set more on
